# rename and subset to prep for merging
to_merge <- lapply(seq(imported), function(x) {
names(imported[[x]]) <- cols[[x]]
imported[[x]]$ID <-  gsub("^.*?-","",imported[[x]]$ID)
return(data.frame(imported[[x]][1], imported[[x]][4], imported[[x]][2]))
})
# merge and clean up
wals_data <- Reduce(function(x, y) merge(x, y, all.y=TRUE, all.x=TRUE, id="ID"), to_merge)
rm(to_merge,imported,cols,to_read,heads,vars)
# recode the WALS variables to binary variables
wals_data$M.T.Pronouns <- recode(wals_data$M.T.Pronouns,
`No M-T pronouns` = "No M-T pronouns", .default = "Exists")
wals_data$Alignment.of.Verbal.Person.Marking <- recode(wals_data$Alignment.of.Verbal.Person.Marking,
`Accusative` = "Accusative", .default = "Other")
wals_data$Expression.of.Pronominal.Subjects <- recode(wals_data$Expression.of.Pronominal.Subjects,
`Subject affixes on verb` = "Subject affixes on verb", .default = "Other")
wals_data$Verbal.Person.Marking <- recode(wals_data$Verbal.Person.Marking,
`No person marking` = "No person marking", .default = "Other")
wals_data$Order.of.Person.Markers.on.the.Verb <- recode(wals_data$Order.of.Person.Markers.on.the.Verb,
`A and P do not or do not both occur on the verb` = "A and P do not or do not both occur on the verb", .default = "Other")
wals_data$Ditransitive.Constructions..The.Verb..Give. <- recode(wals_data$Ditransitive.Constructions..The.Verb..Give.,
`Indirect-object construction` = "Indirect-object construction", .default = "Other")
wals_data$Reciprocal.Constructions <- recode(wals_data$Reciprocal.Constructions,
`Distinct from reflexive` = "Distinct from reflexive", .default = "Other")
wals_data$Passive.Constructions <- recode(wals_data$Passive.Constructions,
`Present` = "Present", .default = "Absent")
wals_data$Applicative.Constructions <- recode(wals_data$Applicative.Constructions,
`No applicative construction` = "No applicative construction", .default = "Applicative")
wals_data$Periphrastic.Causative.Constructions <- recode(wals_data$Periphrastic.Causative.Constructions,
`Purposive but no sequential` = "Purposive but no sequential", .default = "Other")
wals_data$Nonperiphrastic.Causative.Constructions <- recode(wals_data$Nonperiphrastic.Causative.Constructions,
`Morphological but no compound` = "Morphological but no compound", .default = "Other")
wals_data$Negative.Morphemes <- recode(wals_data$Negative.Morphemes,
`Negative affix` = "Negative affix", .default = "Other")
wals_data$Symmetric.and.Asymmetric.Standard.Negation <- recode(wals_data$Symmetric.and.Asymmetric.Standard.Negation,
`Symmetric` = "Symmetric", .default = "Other")
wals_data$Negative.Indefinite.Pronouns.and.Predicate.Negation <- recode(wals_data$Negative.Indefinite.Pronouns.and.Predicate.Negation,
`Predicate negation also present` = "Predicate negation also present", .default = "Other")
wals_data$Polar.Questions <- recode(wals_data$Polar.Questions,
`Question particle` = "Question particle", .default = "Other")
wals_data$Predicative.Possession <- recode(wals_data$Predicative.Possession,
`'Have'` = "'Have'", .default = "Other")
wals_data$Predicative.Adjectives <- recode(wals_data$Predicative.Adjectives,
`Nonverbal encoding` = "Nonverbal encoding", .default = "Other")
wals_data$Comparative.Constructions <- recode(wals_data$Comparative.Constructions,
`Particle` = "Particle", .default = "Other")
wals_data$Relativization.on.Subjects <- recode(wals_data$Relativization.on.Subjects,
`Relative pronoun` = "Relative pronoun", .default = "Other")
wals_data$Relativization.on.Obliques <- recode(wals_data$Relativization.on.Obliques,
`Relative pronoun` = "Relative pronoun", .default = "Other")
wals_data$X.Want..Complement.Subjects <- recode(wals_data$X.Want..Complement.Subjects,
`Subject is left implicit` = "Subject is left implicit", .default = "Other")
wals_data$Purpose.Clauses <- recode(wals_data$Purpose.Clauses,
`Deranked` = "Deranked", .default = "Other")
wals_data$X.When..Clauses <- recode(wals_data$X.When..Clauses,
`Deranked` = "Deranked", .default = "Other")
wals_data$Reason.Clauses <- recode(wals_data$Reason.Clauses,
`Balanced` = "Balanced", .default = "Other")
wals_data$Utterance.Complement.Clauses <- recode(wals_data$Utterance.Complement.Clauses,
`Balanced` = "Balanced", .default = "Other")
wals_data$Syllable.Structure <- recode(wals_data$Syllable.Structure,
`Balanced` = "Complex", .default = "Other")
wals_data$Numeral.Bases <- recode(wals_data$Numeral.Bases,
`Decimal` = "Decimal", .default = "Other")
wals_data$Number.of.Basic.Colour.Categories <- recode(wals_data$Number.of.Basic.Colour.Categories,
`11` = "11", .default = "Other")
wals_data$Tea <- recode(wals_data$Tea,
`Words derived from Min Nan Chinese te` = "Words derived from Min Nan Chinese te", .default = "Other")
wals_data$Tone <- recode(wals_data$Tone,
`No tones` = "No Tones", .default = "Other")
wals_data$Para.Linguistic.Usages.of.Clicks <- recode(wals_data$Para.Linguistic.Usages.of.Clicks,
`Affective meanings` = "Affective meanings", .default = "Other")
wals_data$Order.of.Negative.Morpheme.and.Verb <- recode(wals_data$Order.of.Negative.Morpheme.and.Verb,
`[V-Neg]` = "[V-Neg]", .default = "Other")
wals_data$Preverbal.Negative.Morphemes <- recode(wals_data$Preverbal.Negative.Morphemes,
`NegV` = "NegV", .default = "Other")
wals_data$Postverbal.Negative.Morphemes <- recode(wals_data$Postverbal.Negative.Morphemes,
`None` = "None", .default = "Other")
wals_data$Fixed.Stress.Locations <- recode(wals_data$Fixed.Stress.Locations,
`No fixed stress` = "No fixed stress", .default = "Other")
wals_data$Weight.Sensitive.Stress <- recode(wals_data$Weight.Sensitive.Stress,
`Fixed stress (no weight-sensitivity)` = "Fixed stress (no weight-sensitivity)", .default = "Other")
wals_data$Weight.Factors.in.Weight.Sensitive.Stress.Systems <- recode(wals_data$Weight.Factors.in.Weight.Sensitive.Stress.Systems,
`Combined` = "Combined", .default = "Other")
wals_data$Rhythm.Types <- recode(wals_data$Rhythm.Types,
`Trochaic` = "Trochaic", .default = "Other")
wals_data$Presence.of.Uncommon.Consonants <- recode(wals_data$Presence.of.Uncommon.Consonants,
`'Th' sounds` = "'Th' sounds", .default = "Other")
wals_data$Consonant.Inventories <- recode(wals_data$Consonant.Inventories,
`Large` = "Moderately Large or Large",
`Moderately large` = "Moderately Large or Large", .default = "Other")
wals_data$Fusion.of.Selected.Inflectional.Formatives <- recode(wals_data$Fusion.of.Selected.Inflectional.Formatives,
`Exclusively concatenative` = "Exclusively concatenative", .default = "Other")
wals_data$Exponence.of.Selected.Inflectional.Formatives <- recode(wals_data$Exponence.of.Selected.Inflectional.Formatives,
`Monoexponential case` = "Monoexponential case", .default = "Other")
wals_data$Exponence.of.Tense.Aspect.Mood.Inflection <- recode(wals_data$Exponence.of.Tense.Aspect.Mood.Inflection,
`monoexponential TAM` = "monoexponential TAM", .default = "Other")
wals_data$Inflectional.Synthesis.of.the.Verb <- recode(wals_data$Inflectional.Synthesis.of.the.Verb,
`0-1 category per word` = "0-1 category per word", .default = "Other")
wals_data$Locus.of.Marking.in.the.Clause <- recode(wals_data$Locus.of.Marking.in.the.Clause,
`Dependent marking` = "Dependent marking", .default = "Other")
wals_data$Locus.of.Marking.in.Possessive.Noun.Phrases <- recode(wals_data$Locus.of.Marking.in.Possessive.Noun.Phrases,
`Dependent marking` = "Dependent marking", .default = "Other")
wals_data$Locus.of.Marking..Whole.language.Typology <- recode(wals_data$Locus.of.Marking..Whole.language.Typology,
`Dependent-marking` = "Dependent-marking", .default = "Other")
wals_data$Prefixing.vs..Suffixing.in.Inflectional.Morphology <- recode(wals_data$Prefixing.vs..Suffixing.in.Inflectional.Morphology,
`Strongly suffixing` = "Strongly suffixing", .default = "Other")
wals_data$Reduplication <- recode(wals_data$Reduplication,
`No productive reduplication` = "No productive reduplication", .default = "Other")
wals_data$Case.Syncretism <- recode(wals_data$Case.Syncretism,
`Core and non-core` = "Core and non-core", .default = "Other")
wals_data$Syncretism.in.Verbal.Person.Number.Marking <- recode(wals_data$Syncretism.in.Verbal.Person.Number.Marking,
`Syncretic` = "Syncretic", .default = "Other")
wals_data$Vowel.Quality.Inventories <- recode(wals_data$Vowel.Quality.Inventories,
`Large (7-14)` = "Large (7-14)", .default = "Other")
wals_data$Number.of.Genders <- recode(wals_data$Number.of.Genders,
`None` = "None", .default = "Other")
wals_data$Sex.based.and.Non.sex.based.Gender.Systems <- recode(wals_data$Sex.based.and.Non.sex.based.Gender.Systems,
`Sex-based` = "Sex-based", .default = "Other")
wals_data$Systems.of.Gender.Assignment <- recode(wals_data$Systems.of.Gender.Assignment,
`Semantic and formal` = "Semantic and formal", .default = "Other")
wals_data$Coding.of.Nominal.Plurality <- recode(wals_data$Coding.of.Nominal.Plurality,
`Plural suffix` = "Plural suffix", .default = "Other")
wals_data$Occurrence.of.Nominal.Plurality <- recode(wals_data$Occurrence.of.Nominal.Plurality,
`Only human nouns, optional` = "Only human nouns, optional", .default = "Other")
wals_data$Plurality.in.Independent.Personal.Pronouns <- recode(wals_data$Plurality.in.Independent.Personal.Pronouns,
`Person-number stem` = "Person-number stem", .default = "Other")
wals_data$The.Associative.Plural <- recode(wals_data$The.Associative.Plural,
`No associative plural` = "No associative plural", .default = "Other")
wals_data$Definite.Articles <- recode(wals_data$Definite.Articles,
`Definite word distinct from demonstrative` = "Definite word distinct from demonstrative", .default = "Other")
wals_data$Indefinite.Articles <- recode(wals_data$Indefinite.Articles,
`Indefinite word distinct from 'one'` = "Indefinite word distinct from 'one'", .default = "Other")
wals_data$Inclusive.Exclusive.Distinction.in.Independent.Pronouns <- recode(wals_data$Inclusive.Exclusive.Distinction.in.Independent.Pronouns,
`Inclusive/exclusive` = "Inclusive/exclusive", .default = "Other")
wals_data$Consonant.Vowel.Ratio <- recode(wals_data$Consonant.Vowel.Ratio,
`Moderately high` = "Moderately High or High",
`High` = "Moderately High or High", .default = "Other")
wals_data$Inclusive.Exclusive.Distinction.in.Verbal.Inflection <- recode(wals_data$Inclusive.Exclusive.Distinction.in.Verbal.Inflection,
`'We' the same as 'I'` = "'We' the same as 'I'", .default = "Other")
wals_data$Distance.Contrasts.in.Demonstratives <- recode(wals_data$Distance.Contrasts.in.Demonstratives,
`No distance contrast` = "No distance contrast", .default = "Other")
wals_data$Pronominal.and.Adnominal.Demonstratives <- recode(wals_data$Pronominal.and.Adnominal.Demonstratives,
`Identical` = "Identical", .default = "Other")
wals_data$Third.Person.Pronouns.and.Demonstratives <- recode(wals_data$Third.Person.Pronouns.and.Demonstratives,
`Unrelated` = "Unrelated", .default = "Other")
wals_data$Gender.Distinctions.in.Independent.Personal.Pronouns <- recode(wals_data$Gender.Distinctions.in.Independent.Personal.Pronouns,
`3rd person singular only` = "3rd person singular only", .default = "Other")
wals_data$Politeness.Distinctions.in.Pronouns <- recode(wals_data$Politeness.Distinctions.in.Pronouns,
`No politeness distinction` = "No politeness distinction", .default = "Exists")
wals_data$Indefinite.Pronouns <- recode(wals_data$Indefinite.Pronouns,
`Generic-noun-based` = "Generic-noun-based", .default = "Other")
wals_data$Person.Marking.on.Adpositions <- recode(wals_data$Person.Marking.on.Adpositions,
`No person marking` = "No person marking", .default = "Other")
wals_data$Number.of.Cases <- recode(wals_data$Number.of.Cases,
`No morphological case-marking` = "No morphological case-marking", .default = "Other")
wals_data$Voicing.in.Plosives.and.Fricatives <- recode(wals_data$Voicing.in.Plosives.and.Fricatives,
`In both plosives and fricatives` = "In both plosives and fricatives", .default = "Other")
wals_data$Asymmetrical.Case.Marking <- recode(wals_data$Asymmetrical.Case.Marking,
`Additive-quantitatively asymmetrical` = "Additive-quantitatively asymmetrical", .default = "Other")
wals_data$Position.of.Case.Affixes <- recode(wals_data$Position.of.Case.Affixes,
`No case affixes or adpositional clitics` = "No case affixes or adpositional clitics", .default = "Other")
wals_data$Comitatives.and.Instrumentals <- recode(wals_data$Comitatives.and.Instrumentals,
`Differentiation` = "Differentiation", .default = "Other")
wals_data$Ordinal.Numerals <- recode(wals_data$Ordinal.Numerals,
`First, second, three-th` = "First, second, three-th", .default = "Other")
wals_data$Distributive.Numerals <- recode(wals_data$Distributive.Numerals,
`No distributive numerals` = "No distributive numerals", .default = "Other")
wals_data$Numeral.Classifiers <- recode(wals_data$Numeral.Classifiers,
`Absent` = "Absent", .default = "Exists")
wals_data$Conjunctions.and.Universal.Quantifiers <- recode(wals_data$Conjunctions.and.Universal.Quantifiers,
`Formally different` = "Formally different", .default = "Other")
wals_data$Position.of.Pronominal.Possessive.Affixes <- recode(wals_data$Position.of.Pronominal.Possessive.Affixes,
`No possessive affixes` = "No possessive affixes", .default = "Exists")
wals_data$Voicing.and.Gaps.in.Plosive.Systems <- recode(wals_data$Voicing.and.Gaps.in.Plosive.Systems,
`None missing in /p t k b d g/` = "None missing in /p t k b d g/", .default = "Other")
wals_data$Genitives..Adjectives.and.Relative.Clauses <- recode(wals_data$Genitives..Adjectives.and.Relative.Clauses,
`Highly differentiated` = "Highly differentiated", .default = "Other")
wals_data$Adjectives.without.Nouns <- recode(wals_data$Adjectives.without.Nouns,
`Without marking` = "Without marking", .default = "Other")
wals_data$Action.Nominal.Constructions <- recode(wals_data$Action.Nominal.Constructions,
`Ergative-Possessive` = "Ergative-Possessive", .default = "Other")
wals_data$Noun.Phrase.Conjunction <- recode(wals_data$Noun.Phrase.Conjunction,
`'And' identical to 'with'` = "'And' identical to 'with'", .default = "Other")
wals_data$Nominal.and.Verbal.Conjunction <- recode(wals_data$Nominal.and.Verbal.Conjunction,
`Differentiation` = "Differentiation", .default = "Other")
wals_data$The.Past.Tense <- recode(wals_data$The.Past.Tense,
`No past tense` = "No past tense", .default = "Exists")
wals_data$The.Perfect <- recode(wals_data$The.Perfect,
`No perfect` = "No perfect", .default = "Other")
wals_data$Position.of.Tense.Aspect.Affixes <- recode(wals_data$Position.of.Tense.Aspect.Affixes,
`Tense-aspect suffixes` = "Tense-aspect suffixes", .default = "Other")
wals_data$Uvular.Consonants <- recode(wals_data$Uvular.Consonants,
`None` = "None", .default = "Exists")
wals_data$The.Morphological.Imperative <- recode(wals_data$The.Morphological.Imperative,
`No second-person imperatives` = "No second-person imperatives", .default = "Exists")
wals_data$The.Prohibitive <- recode(wals_data$The.Prohibitive,
`Normal imperative + normal negative` = "Normal imperative + normal negative", .default = "Other")
wals_data$Imperative.Hortative.Systems <- recode(wals_data$Imperative.Hortative.Systems,
`Neither type of system` = "Neither type of system", .default = "Some System")
wals_data$Situational.Possibility <- recode(wals_data$Situational.Possibility,
`Verbal constructions` = "Verbal constructions", .default = "Other")
wals_data$Epistemic.Possibility <- recode(wals_data$Epistemic.Possibility,
`Verbal constructions` = "Verbal constructions", .default = "Other")
wals_data$Overlap.between.Situational.and.Epistemic.Modal.Marking <- recode(wals_data$Overlap.between.Situational.and.Epistemic.Modal.Marking,
`Overlap for both possibility and necessity` = "Overlap for both possibility and necessity", .default = "Other")
wals_data$Semantic.Distinctions.of.Evidentiality <- recode(wals_data$Semantic.Distinctions.of.Evidentiality,
`No grammatical evidentials` = "No grammatical evidentials", .default = "Exists")
wals_data$Coding.of.Evidentiality <- recode(wals_data$Coding.of.Evidentiality,
`Modal morpheme` = "Modal morpheme", .default = "Other")
wals_data$Suppletion.According.to.Tense.and.Aspect <- recode(wals_data$Suppletion.According.to.Tense.and.Aspect,
`Tense and aspect` = "Tense and aspect", .default = "Other")
wals_data$Suppletion.in.Imperatives.and.Hortatives <- recode(wals_data$Suppletion.in.Imperatives.and.Hortatives,
`Imperative` = "Imperative", .default = "Other")
wals_data$Glottalized.Consonants <- recode(wals_data$Glottalized.Consonants,
`No glottalized consonants` = "No glottalized consonants", .default = "Exist")
wals_data$Verbal.Number.and.Suppletion <- recode(wals_data$Verbal.Number.and.Suppletion,
`None` = "None", .default = "Exist")
wals_data$Order.of.Subject..Object.and.Verb <- recode(wals_data$Order.of.Subject..Object.and.Verb,
`SVO` = "SVO", .default = "Other")
wals_data$Order.of.Subject.and.Verb <- recode(wals_data$Order.of.Subject.and.Verb,
`SV` = "SV", .default = "Other")
wals_data$Order.of.Object.and.Verb <- recode(wals_data$Order.of.Object.and.Verb,
`VO` = "VO", .default = "Other")
wals_data$Order.of.Object..Oblique..and.Verb <- recode(wals_data$Order.of.Object..Oblique..and.Verb,
`VOX` = "VOX", .default = "Other")
wals_data$Order.of.Adposition.and.Noun.Phrase <- recode(wals_data$Order.of.Adposition.and.Noun.Phrase,
`Postpositions` = "Postpositions", .default = "Other")
wals_data$Order.of.Genitive.and.Noun <- recode(wals_data$Order.of.Genitive.and.Noun,
`Noun-Genitive` = "Noun-Genitive", .default = "Other")
wals_data$Order.of.Adjective.and.Noun <- recode(wals_data$Order.of.Adjective.and.Noun,
`Adjective-Noun` = "Adjective-Noun", .default = "Other")
wals_data$Order.of.Demonstrative.and.Noun <- recode(wals_data$Order.of.Demonstrative.and.Noun,
`Demonstrative-Noun` = "Demonstrative-Noun", .default = "Other")
wals_data$Order.of.Numeral.and.Noun <- recode(wals_data$Order.of.Numeral.and.Noun,
`Numeral-Noun` = "Numeral-Noun", .default = "Other")
wals_data$Lateral.Consonants <- recode(wals_data$Lateral.Consonants,
`/l/, no obstruent laterals` = "/l/, no obstruent laterals", .default = "Other")
wals_data$Order.of.Relative.Clause.and.Noun <- recode(wals_data$Order.of.Relative.Clause.and.Noun,
`Noun-Relative clause` = "Noun-Relative clause", .default = "Other")
wals_data$Order.of.Degree.Word.and.Adjective <- recode(wals_data$Order.of.Degree.Word.and.Adjective,
`Degree word-Adjective` = "Degree word-Adjective", .default = "Other")
wals_data$Position.of.Polar.Question.Particles <- recode(wals_data$Position.of.Polar.Question.Particles,
`No question particle` = "No question particle", .default = "Exists")
wals_data$Position.of.Interrogative.Phrases.in.Content.Questions <- recode(wals_data$Position.of.Interrogative.Phrases.in.Content.Questions,
`Initial interrogative phrase` = "Initial interrogative phrase", .default = "Other")
wals_data$Order.of.Adverbial.Subordinator.and.Clause <- recode(wals_data$Order.of.Adverbial.Subordinator.and.Clause,
`Initial subordinator word` = "Initial subordinator word", .default = "Other")
wals_data$Relationship.between.the.Order.of.Object.and.Verb.and.the.Order.of.Adposition.and.Noun.Phrase <- recode(wals_data$Relationship.between.the.Order.of.Object.and.Verb.and.the.Order.of.Adposition.and.Noun.Phrase,
`VO and Prepositions` = "VO and Prepositions", .default = "Other")
wals_data$Relationship.between.the.Order.of.Object.and.Verb.and.the.Order.of.Relative.Clause.and.Noun <- recode(wals_data$Relationship.between.the.Order.of.Object.and.Verb.and.the.Order.of.Relative.Clause.and.Noun,
`VO and NRel` = "VO and NRel", .default = "Other")
wals_data$Relationship.between.the.Order.of.Object.and.Verb.and.the.Order.of.Adjective.and.Noun <- recode(wals_data$Relationship.between.the.Order.of.Object.and.Verb.and.the.Order.of.Adjective.and.Noun,
`VO and NAdj` = "VO and NAdj", .default = "Other")
wals_data$Alignment.of.Case.Marking.of.Full.Noun.Phrases <- recode(wals_data$Alignment.of.Case.Marking.of.Full.Noun.Phrases,
`Nominative - accusative (standard)` = "Nominative - accusative (standard)", .default = "Other")
wals_data$Alignment.of.Case.Marking.of.Pronouns <- recode(wals_data$Alignment.of.Case.Marking.of.Pronouns,
`Nominative - accusative (standard)` = "Nominative - accusative (standard)", .default = "Other")
wals_data$The.Velar.Nasal <- recode(wals_data$The.Velar.Nasal,
`No velar nasal` = "No velar nasal", .default = "Exists")
################ RECODE DATA FROM THE WORLD VALUES SURVEY ################
# read in WVS
setwd("/Users/TomPepinsky/Dropbox/Papers/Sapir-Whorf/code")
na_strings <- c("Don´t know", "Missing; Unknown", "No answer", "Not applicable", "Not asked in survey")
wvs_data <- read.csv("forR.csv", na.strings=na_strings)
# first, recode the 25 values variables
# recode importance of things
wvs_data <- wvs_data %>%
mutate_at(c("Important.in.life..Family","Important.in.life..Friends","Important.in.life..Leisure.time",
"Important.in.life..Politics","Important.in.life..Work","Important.in.life..Religion"),
funs(recode(., `Not at all important` = 1,
`Not very important` = 2,
`Rather important` = 3,
`Very important` = 4)))
wvs_data$Interest.in.politics <- recode(wvs_data$Interest.in.politics,
`Not at all interested` = 1,
`Not very interested` = 2,
`Somewhat interested` = 3,
`Very interested` = 4)
wvs_data$Most.people.can.be.trusted <- recode(wvs_data$Most.people.can.be.trusted,
`Can´t be too careful` = 0,
`Most people can be trusted` = 1)
wvs_data$How.much.freedom.of.choice.and.control <- recode(wvs_data$How.much.freedom.of.choice.and.control,
`None at all` = 1, `2`=2, `3`=3, `4`=4, `5`=5, `6`=6, `7`=7,
`8`=8, `9`=9, `A great deal` = 10)
wvs_data$Jobs.scarce..Employers.should.give.priority.to..nation..people.than.immigrants <-
recode(wvs_data$Jobs.scarce..Employers.should.give.priority.to..nation..people.than.immigrants,
`Disagree` = 0,
`Neither` = 1,
`Agree` = 2)
wvs_data$Jobs.scarce..Men.should.have.more.right.to.a.job.than.women <-
recode(wvs_data$Jobs.scarce..Men.should.have.more.right.to.a.job.than.women,
`Disagree` = 0,
`Neither` = 1,
`Agree` = 2)
wvs_data$Woman.as.a.single.parent <- recode(wvs_data$Woman.as.a.single.parent,
`Disapprove` = 0,
`Depends` = 1,
`Approve` = 2)
wvs_data$Men.make.better.political.leaders.than.women.do <- recode(wvs_data$Men.make.better.political.leaders.than.women.do,
`Strongly disagree` = 0,
`Disagree` = 1,
`Agree` = 2,
`Agree strongly` = 3)
wvs_data$Aims.of.country..first.choice <- recode(wvs_data$Aims.of.country..first.choice,
`A high level of economic growth` = 1, .default = 0)
wvs_data$Most.important..first.choice <- recode(wvs_data$Most.important..first.choice,
`A stable economy` = 1, .default = 0)
wvs_data$Willingness.to.fight.for.country <- recode(wvs_data$Willingness.to.fight.for.country,
`Yes` = 1, .default = 0)
wvs_data$Self.positioning.in.political.scale <- recode(wvs_data$Self.positioning.in.political.scale,
`Left` = 1, `2`=2, `3`=3, `4`=4, `5`=5, `6`=6, `7`=7,
`8`=8, `9`=9, `Right` = 10)
wvs_data$Income.equality <- recode(wvs_data$Income.equality,
`Incomes should be made more equal` = 1,
`2`=2, `3`=3, `4`=4, `5`=5, `6`=6, `7`=7, `8`=8, `9`=9,
`We need larger income differences as incentives` = 10)
wvs_data$Private.vs.state.ownership.of.business <- recode(wvs_data$Private.vs.state.ownership.of.business,
`Private ownership of business should be increased` = 1,
`2`=2, `3`=3, `4`=4, `5`=5, `6`=6, `7`=7, `8`=8, `9`=9,
`Government ownership of business should be increased` = 10)
wvs_data$Government.responsibility <- recode(wvs_data$Government.responsibility,
`People should take more responsibility` = 1,
`2`=2, `3`=3, `4`=4, `5`=5, `6`=6, `7`=7, `8`=8, `9`=9,
`The government should take more responsibility` = 10)
wvs_data$Political.system..Having.a.strong.leader <- recode(wvs_data$Political.system..Having.a.strong.leader,
`Very bad` = 0,
`Bad` = 1,
`Fairly good` = 2,
`Very good` = 3)
wvs_data$Political.system..Having.a.democratic.political.system <- recode(wvs_data$Political.system..Having.a.democratic.political.system,
`Very bad` = 0,
`Bad` = 1,
`Fairly good` = 2,
`Very good` = 3)
wvs_data$How.proud.of.nationality <- recode(wvs_data$How.proud.of.nationality,
`Not at all proud` = 0,
`Not very proud` = 1,
`Quite proud` = 2,
`Very proud` = 3)
wvs_data$Religious.person <- recode(wvs_data$Religious.person,
`A convinced atheist` = 0,
`Not a religious person` = 1,
`A religious person` = 2)
wvs_data$Feeling.of.happiness <- recode(wvs_data$Feeling.of.happiness,
`Not at all happy` = 0,
`Not very happy` = 1,
`Quite happy` = 2,
`Very happy` = 3)
# now recode the four behavioral outcomes
wvs_data$How.often.do.you.attend.religious.services <- recode(wvs_data$How.often.do.you.attend.religious.services,
`Never practically never` = 0,
`Less often` = 1,
`Once a year` = 2,
`Only on special holy days/Christmas/Easter days` = 3,
`Other specific holy days` = 3,
`Once a month` = 4,
`Once a week` = 5,
`More than once a week` = 6)
wvs_data$Family.savings.during.past.year <- recode(wvs_data$Family.savings.during.past.year,
`Just get by` = 0,
`Spent savings and borrowed money` = 1,
`Spent some savings and borrowed money` = 1,
`Save money` = 2)
wvs_data$How.many.children.do.you.have <- recode(wvs_data$How.many.children.do.you.have,
`No child` = 0,
`1 child` = 1,
`2 children` = 2,
`3 children` = 3,
`4 children` = 4,
`5 children` = 5,
`6 children` = 6,
`7 children` = 7,
`8` = 8)
wvs_data$Political.action..signing.a.petition <- recode(wvs_data$Political.action..signing.a.petition,
`Would never do` = 0,
`Might do` = 1,
`Have done` = 2)
# list of languages -> used this to make the crosswalk
# write.table(data.frame(levels(as.factor(wvs_data$Language.in.which.interview.was.conducted))),
#             file="interview_language.csv", sep = ",", row.names=FALSE)
# write.table(data.frame(levels(as.factor(wvs_data$Language.at.home))),
#             file="home_language.csv", sep = ",", row.names=FALSE)
# write.table(data, file="merged_data.csv", sep = ",", row.names=FALSE)
################ MERGE WVS AND WALS AND PREP FOR ANALYSIS ################
# drop observations where interview language is "NA"
wvs_interview <- wvs_data[complete.cases(wvs_data[ , 8]),]
wvs_home <- wvs_data[complete.cases(wvs_data[ , 43]),]
# merge data
xwalk <- read.csv("language crosswalk.csv", na.strings=c("","NA"))[,1:3]
names(xwalk) <- c("Language.in.which.interview.was.conducted","Language.at.home","Language")
match1 <- merge(wvs_interview,xwalk,by="Language.in.which.interview.was.conducted", all.x=TRUE)
match2 <- merge(wvs_home,xwalk,by="Language.at.home", all.x=TRUE)
matched_wals_wvs_interview <- merge(match1, wals_data, all.x=TRUE, by="Language")
matched_wals_wvs_home <- merge(match2, wals_data, all.x=TRUE, by="Language")
# break the WALS IVs for the analysis into chunks to avoid taxing the memory
# select only variables that vary across respondents in the sample
wals_col_names <- names(wals_data)
dimensions <- lapply(wals_col_names, function(x) {
v1 <- table(matched_wals_wvs_interview[,x])[1]
v2 <- table(matched_wals_wvs_interview[,x])[2]
v3 <- length(table(matched_wals_wvs_interview[,x]))
return(c(v1,v2,v3))
}
)
to_bind <- t(matrix(unlist(dimensions),nrow=3))
indexed <- cbind(wals_col_names,to_bind)
toselect <- indexed[indexed[,4]==2 & indexed[,2]!=0 & indexed[,3]!=0, ]
xs <- split(toselect[,1], ceiling(seq_along(toselect[,1])/5))
# a vector of 25 possible values DVs
possible_dvs <- c("Important.in.life..Family","Important.in.life..Friends","Important.in.life..Leisure.time",
"Important.in.life..Politics","Important.in.life..Work","Important.in.life..Religion","Interest.in.politics",
"Most.people.can.be.trusted","How.much.freedom.of.choice.and.control",
"Jobs.scarce..Employers.should.give.priority.to..nation..people.than.immigrants",
"Jobs.scarce..Men.should.have.more.right.to.a.job.than.women", "Woman.as.a.single.parent",
"Men.make.better.political.leaders.than.women.do",
"Aims.of.country..first.choice", "Most.important..first.choice",
"Willingness.to.fight.for.country","Self.positioning.in.political.scale","Income.equality",
"Private.vs.state.ownership.of.business","Government.responsibility",
"Political.system..Having.a.strong.leader","Political.system..Having.a.democratic.political.system",
"How.proud.of.nationality","Religious.person","Feeling.of.happiness")
# a vector of 4 possible behavioral DVs
possible_dvs_behave <- c("How.often.do.you.attend.religious.services","Family.savings.during.past.year",
"How.many.children.do.you.have","Political.action..signing.a.petition")
lang_table <- data.frame(table(matched_wals_wvs_home$Language.at.home[matched_wals_wvs_home$Language!=""] ))
names(lang_table) <- c("Language.at.home", "Frequency")
lang_table_merged <- merge(lang_table,xwalk,by="Language.at.home", all.x=TRUE)
lang_table_merged <- lang_table_merged[ , !(names(lang_table_merged) %in% c("Language.in.which.interview.was.conducted"))]
lang_table_merged <- lang_table_merged[lang_table_merged$Frequency!=0,]
names(lang_table_merged) <- c("Language (WVS Name)", "Frequency", "Language (WALS Name)")
row.names(lang_table_merged) <- 1:nrow(lang_table_merged)
xtable(lang_table_merged, type="latex")
table(matched_wals_wvs_home$Language.at.home)
matched_wals_wvs_interview$Language.in.which.interview.was.conducted
table(matched_wals_wvs_interview$Language.in.which.interview.was.conducted)
table(matched_wals_wvs_interview$Language.at.home.y)
table(wvs_data$Language.at.home)
table(wvs_data$Language.at.home[wvs_data$Language.in.which.interview.was.conducted=="Jewish",])
table(wvs_data$Language.at.home[wvs_data$Language.in.which.interview.was.conducted=="Jewish"])
table(matched_wals_wvs_home$Language.at.home[matched_wals_wvs_home$Language==""] )
table(matched_wals_wvs_home$Language.at.home[matched_wals_wvs_home$Language==NA] )
data.frame(table(matched_wals_wvs_home$Language.at.home[matched_wals_wvs_home$Language==NA] ))
table(matched_wals_wvs_home$Language.at.home)
data.frame(table(matched_wals_wvs_home$Language.at.home))
lang_table
lang_table_merged
lang_table <- data.frame(table(matched_wals_wvs_home$Language.at.home))
names(lang_table) <- c("Language.at.home", "Frequency")
lang_table_merged <- merge(lang_table,xwalk,by="Language.at.home", all.x=TRUE)
lang_table_merged <- lang_table_merged[ , !(names(lang_table_merged) %in% c("Language.in.which.interview.was.conducted"))]
lang_table_merged <- lang_table_merged[lang_table_merged$Frequency!=0,]
names(lang_table_merged) <- c("Language (WVS Name)", "Frequency", "Language (WALS Name)")
row.names(lang_table_merged) <- 1:nrow(lang_table_merged)
write.table(lang_table_merged, file="languages.csv", row.names=FALSE, sep=",")
xtable(lang_table_merged, type="latex")
lang_table <- data.frame(table(matched_wals_wvs_home$Language.at.home[matched_wals_wvs_home$Language!=""] ))
names(lang_table) <- c("Language.at.home", "Frequency")
lang_table_merged <- merge(lang_table,xwalk,by="Language.at.home", all.x=TRUE)
lang_table_merged <- lang_table_merged[ , !(names(lang_table_merged) %in% c("Language.in.which.interview.was.conducted"))]
lang_table_merged <- lang_table_merged[lang_table_merged$Frequency!=0,]
names(lang_table_merged) <- c("Language (WVS Name)", "Frequency", "Language (WALS Name)")
row.names(lang_table_merged) <- 1:nrow(lang_table_merged)
write.table(lang_table_merged, file="languages.csv", row.names=FALSE, sep=",")
xtable(lang_table_merged, type="latex")
nrow(lang_table_merged)
nrow(lang_table)
sum(lang_table$Frequency)
sum(lang_table_merged$Frequency)
lang_table_all <- data.frame(table(matched_wals_wvs_home$Language.at.home))
lang_table_all
sum(lang_table_all$Freq)
100*sum(lang_table$Frequency)/sum(lang_table_all$Freq)
lang_table_all <- data.frame(table(matched_wals_wvs_home$Language.at.home))
100*sum(lang_table$Frequency)/sum(lang_table_all$Freq)
nrow(lang_table_all)
nrow(lang_table)
lang_table_all <- data.frame(table(matched_wals_wvs_home$Language.at.home))
100*sum(lang_table$Frequency)/sum(lang_table_all$Freq)
nrow(lang_table_all)
nrow(lang_table_merged)
nrow(lang_table_all) - nrow(lang_table_merged)
100 - 100*sum(lang_table$Frequency)/sum(lang_table_all$Freq)
# load libraries
library(tidyverse)
library(broom)
library(plyr)
library(sandwich)
library(lmtest)
library(lme4)
library(xtable)
library(reshape)
################ GATHER, MERGE, AND RECODE DATA FROM THE WORLD ATLAS OF LANGUAGE STRUCTURES ################
setwd("/Users/TomPepinsky/Dropbox/Papers/Sapir-Whorf/code/wals data")
# get all the .csv files and read them in
to_read <- list.files(pattern="*.csv")
imported <- lapply(to_read, function(x) read.csv(x))
# gathers column names
vars <- as.character(unlist(lapply(seq(imported), function(x) imported[[x]][1,3])))
heads <- c("Parameter", "Language", "Frequency", "Confidence", "References")
cols <- lapply(seq(imported), function(x) c("ID",vars[x],heads))
# rename and subset to prep for merging
to_merge <- lapply(seq(imported), function(x) {
names(imported[[x]]) <- cols[[x]]
imported[[x]]$ID <-  gsub("^.*?-","",imported[[x]]$ID)
return(data.frame(imported[[x]][1], imported[[x]][4], imported[[x]][2]))
})
# merge and clean up
wals_data <- Reduce(function(x, y) merge(x, y, all.y=TRUE, all.x=TRUE, id="ID"), to_merge)
rm(to_merge,imported,cols,to_read,heads,vars)
table(wals_data$M.T.Pronouns)
table(wals_data$Number.of.Genders)
names(wals_data)
